1. Gastos (cálculos antiguos)

Gastos_casa %>% 
  dplyr::select(-Tiempo,-link) %>%
  dplyr::select(fecha, gasto, monto, gastador,obs) %>% tail(30) %>% 
  knitr::kable(format = "markdown", size=12)
fecha gasto monto gastador obs
17/11/2025 VTR 22000 Andrés NA
17/11/2025 Electricidad 42000 Andrés NA
22/11/2025 Comida 6800 Tami Helado/Galleta Valpo
23/11/2025 Comida 48730 Tami Almuerzo Valpo
23/11/2025 Comida 9990 Tami Cafetería/Sanguche Valpo
24/11/2025 Transporte 5195 Tami Uber Reñaca-Terminal Buses Viña
24/11/2025 Transporte 2675 Tami Uber Depto-Mcdonald Reñaca
22/11/2025 Transporte 8193 Tami Uber Viña-Depto Reñaca
22/11/2025 Transporte 9766 Tami Uber depto Reñaca-Valpo
24/11/2025 Transporte 10000 Andrés condor
24/11/2025 Comida 38290 Andrés barra central
23/11/2025 Comida 17200 Andrés NA
23/11/2025 Comida 18766 Andrés NA
20/11/2025 Comida 22258 Andrés lider
24/11/2025 Comida 55094 Tami Supermercado
26/11/2025 Comida 50000 Andrés piwen
29/11/2025 Comida 108909 Tami Supermercado
4/12/2025 Agua 21244 Andrés NA
8/12/2025 Comida 81926 Tami Supermercado
13/12/2025 Comida 74439 Tami Supermercado
20/12/2025 Comida 74418 Tami Supermercado
23/12/2025 Otros 10000 Andrés basureros navidad
24/12/2025 Electricidad 44000 Andrés NA
24/12/2025 Agua 22000 Andrés NA
28/12/2025 VTR 22000 Andrés NA
29/12/2025 Comida 25780 Andrés NA
30/12/2025 Comida 64932 Tami Supermercado
3/1/2026 Comida 76172 Tami Supermercado
31/3/2019 Comida 9000 Andrés NA
8/9/2019 Comida 24588 Andrés Super Lider

#para ver las diferencias depués de la diosi
Gastos_casa %>%
    dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>% 
    dplyr::mutate(fecha=strftime(fecha, format = "%Y-W%V")) %>%
    dplyr::mutate(gastador=ifelse(gastador=="Andrés",1,0)) %>%
    dplyr::group_by(gastador, fecha,.drop = F) %>% 
    dplyr::summarise(gasto_media=mean(monto,na.rm=T)) %>% 
    dplyr::mutate(treat=ifelse(fecha>"2019-W26",1,0)) %>%
    #dplyr::mutate(fecha_simp=lubridate::week(fecha)) %>%#después de  diosi. Junio 24, 2019 
    dplyr::mutate(gastador_nombre=plyr::revalue(as.character(gastador), c("0" = "Tami", "1"="Andrés"))) %>% 
    assign("ts_gastos_casa_week_treat", ., envir = .GlobalEnv) 

gplots::plotmeans(gasto_media ~ gastador_nombre, main="Promedio de gasto por gastador", data=ts_gastos_casa_week_treat,ylim=c(0,75000), xlab="", ylab="")

library(ggplot2)

ggplot(
  ts_gastos_casa_week_treat,
  aes(x = gastador_nombre, y = gasto_media)
) +
  stat_summary(fun = mean, geom = "point", size = 3, color = "steelblue") +
  stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) +
  facet_wrap(~ treat, labeller = labeller(
    treat = c(`0` = "Antes de Diosi", `1` = "Después de Diosi")
  )) +
  coord_cartesian(ylim = c(0, 70000)) +
  labs(x = "", y = "") +
  theme_minimal(base_size = 13) +
  theme(
    strip.text = element_text(face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

library(ggiraph)
library(scales)
#if( requireNamespace("dplyr", quietly = TRUE)){
gg <- Gastos_casa %>%
  dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>% 
  dplyr::mutate(gastador=ifelse(gastador=="Andrés",1,0)) %>%
  dplyr::mutate(fecha_simp=tsibble::yearweek(fecha)) %>%
  dplyr::mutate(fecha_week=strftime(fecha, format = "%Y-W%V")) %>%
  dplyr::mutate(treat=ifelse(fecha_week>"2019 W26",1,0)) %>%
  dplyr::mutate(gastador_nombre=plyr::revalue(as.character(gastador), c("0" = "Tami", "1"="Andrés"))) %>% 
#  dplyr::mutate(week=as.Date(as.character(lubridate::floor_date(fecha, "week"))))%>%
  #dplyr::mutate(fecha_week= lubridate::parse_date_time(fecha_week, c("%Y-W%V"),exact=T)) %>% 
  dplyr::group_by(gastador_nombre, fecha_simp) %>%
  dplyr::summarise(monto_total=sum(monto)) %>%
  dplyr::mutate(tooltip= paste0(substr(gastador_nombre,1,1),"=",round(monto_total/1000,2))) %>%
  ggplot(aes(hover_css = "fill:none;")) +#, ) +
  #stat_summary(geom = "line", fun.y = median, size = 1, alpha=0.5, aes(color="blue")) +
  geom_line(aes(x = fecha_simp, y = monto_total, color=as.factor(gastador_nombre)),size=1,alpha=.5) +
                       ggiraph::geom_point_interactive(aes(x = fecha_simp, y = monto_total, color=as.factor(gastador_nombre),tooltip=tooltip),size = 1) +
  #geom_text(aes(x = fech_ing_qrt, y = perc_dup-0.05, label = paste0(n)), vjust = -1,hjust = 0, angle=45, size=3) +
 # guides(color = F)+
  theme_custom() +
  geom_vline(xintercept = as.Date("2019-06-24"),linetype = "dashed") +
  labs(y="Gastos (en miles)",x="Semanas y Meses", subtitle="Interlineado, incorporación de la Diosi; Azul= Tami; Rojo= Andrés") + ggtitle( "Figura 4. Gastos por Gastador") +
  scale_y_continuous(labels = f <- function(x) paste0(x/1000)) + 
  scale_color_manual(name = "Gastador", values= c("blue", "red"), labels = c("Tami", "Andrés")) +
  tsibble::scale_x_yearweek(date_breaks = "1 month", minor_breaks = "1 week", labels=scales::date_format("%m/%y")) +
  theme(axis.text.x = element_text(vjust = 0.5,angle = 35), legend.position='bottom')+
     theme(
    panel.border = element_blank(), 
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), 
    axis.line = element_line(colour = "black")
    )

#  x <- girafe(ggobj = gg)
#  x <- girafe_options(x = x,
#                      opts_hover(css = "stroke:red;fill:orange") )
#  if( interactive() ) print(x)

#}
tooltip_css <- "background-color:gray;color:white;font-style:italic;padding:10px;border-radius:10px 20px 10px 20px;"

#ggiraph(code = {print(gg)}, tooltip_extra_css = tooltip_css, tooltip_opacity = .75 )

x <- girafe(ggobj = gg)
x <- girafe_options(x,
  opts_zoom(min = 1, max = 3), opts_hover(css =tooltip_css))
x
plot<-Gastos_casa %>%
    dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>% 
    dplyr::mutate(fecha_week=strftime(fecha, format = "%Y-W%V")) %>%
    dplyr::mutate(month=as.Date(as.character(lubridate::floor_date(fecha, "month"))))%>%
    dplyr::group_by(month)%>%
    dplyr::summarise(gasto_total=sum(monto)/1000) %>%
      ggplot2::ggplot(aes(x = month, y = gasto_total)) +
      geom_point()+
      geom_line(size=1) +
      theme_custom() +
      geom_vline(xintercept = as.Date("2019-06-24"),linetype = "dashed") +
      geom_vline(xintercept = as.Date("2019-03-23"),linetype = "dashed", color="red") +
      labs(y="Gastos (en miles)",x="Meses/Año", subtitle="Interlineado, incorporación de la Diosi") + 
      ggtitle( "Figura. Suma de Gastos por Mes") +        
      scale_x_date(breaks = "1 month", minor_breaks = "1 month", labels=scales::date_format("%m/%y")) +
      theme(axis.text.x = element_text(vjust = 0.5,angle = 45)) 
plotly::ggplotly(plot)  
plot2<-Gastos_casa %>%
    dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>% 
    dplyr::mutate(fecha_week=strftime(fecha, format = "%Y-W%V")) %>%
    dplyr::mutate(day=as.Date(as.character(lubridate::floor_date(fecha, "day"))))%>%
    dplyr::group_by(day)%>%
    summarise(gasto_total=sum(monto)/1000) %>%
      ggplot2::ggplot(aes(x = day, y = gasto_total)) +
      geom_line(size=1) +
      theme_custom() +
      geom_vline(xintercept = as.Date("2019-06-24"),linetype = "dashed") +
      geom_vline(xintercept = as.Date("2020-03-23"),linetype = "dashed", color="red") +
      labs(y="Gastos (en miles)",x="Meses/Año", subtitle="Interlineado, incorporación de la Diosi") + 
      ggtitle( "Figura. Suma de Gastos por Día") +        
      scale_x_date(breaks = "1 month", minor_breaks = "1 week", labels=scales::date_format("%m/%y")) +
      theme(axis.text.x = element_text(vjust = 0.5,angle = 45)) 
plotly::ggplotly(plot2)  
tsData <- Gastos_casa %>%
    dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>% 
    dplyr::mutate(fecha_week=strftime(fecha, format = "%Y-W%V")) %>%
    dplyr::mutate(day=as.Date(as.character(lubridate::floor_date(fecha, "day"))))%>%
    dplyr::group_by(day)%>%
    summarise(gasto_total=sum(monto))%>%
    dplyr::mutate(covid=case_when(day>as.Date("2019-06-02")~1,TRUE~0))%>%
    dplyr::mutate(covid=case_when(day>as.Date("2020-03-10")~covid+1,TRUE~covid))%>%
    dplyr::mutate(covid=as.factor(covid))%>%
  data.frame()
tsData_gastos <-ts(tsData$gasto_total, frequency=7)
mstsData_gastos <- forecast::msts(Gastos_casa$monto, seasonal.periods=c(7,30))

tsData_gastos = decompose(tsData_gastos)

tsdata_gastos_trend<-cbind(tsData,trend=as.vector(tsData_gastos$trend))%>% na.omit()

# Assuming your time series starts on "2019-03-03"
start_date <- as.Date("2019-03-03")
frequency <- 7  # Weekly data
num_periods <- length(tsData_gastos$x)  # Total number of periods in your time series

# Generate sequence of dates
dates <- tsData$day# seq.Date(from = start_date, by = "day", length.out = num_periods)

# Create a data frame from the decomposed time series object
tsData_gastos_df <- data.frame(
  day = dates,
  Actual = as.numeric(tsData_gastos$x),
  Seasonal = as.numeric(tsData_gastos$seasonal),
  Trend = as.numeric(tsData_gastos$trend),
  Random = as.numeric(tsData_gastos$random)
)

tsData_gastos_long <- tsData_gastos_df %>%
  pivot_longer(cols = c("Actual", "Seasonal", "Trend", "Random"), 
               names_to = "Component", values_to = "Value")

# Plotting with facet_wrap
ggplot(tsData_gastos_long, aes(x = day, y = Value)) +
  geom_line() +
  theme_bw() + 
  labs(title = "Descomposición de los Gastos Diarios", x = "Date", y = "Value") +
  scale_x_date(date_breaks = "3 months", date_labels = "%m %Y") +
  facet_wrap(~ Component, scales = "free_y", ncol=1) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  theme(strip.text = element_text(size = 12))

Ahora con las tendencias descompuestas

#require(zoo)
Gastos_casa_gastador_cat_gasto<- 
Gastos_casa %>%
    dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>% 
    dplyr::mutate(fecha2=strftime(fecha, format = "%Y-W%V")) %>%
    dplyr::mutate(gastador=ifelse(gastador=="Andrés",1,0)) %>%
    dplyr::mutate(treat=ifelse(fecha2>"2019-W26",1,0)) %>% 
    dplyr::mutate(
      # 1) Normalización básica de texto
      gasto_clean = gasto %>%
        stringr::str_squish() %>%                   # espacios extra
        stringr::str_to_lower() %>%                 # todo en minúsculas
        stringr::str_replace_all(",", "") %>%       # quita comas, si hubiera
        stringr::str_replace_all("\\s+/\\s+", "/") %>%  # "a / b" -> "a/b"
        stringi::stri_trans_general("Latin-ASCII"),  # sin tildes
      
      # 2) Unificar etiquetas que son lo mismo
      gasto_clean = dplyr::case_when(
        gasto_clean %in% c("uber", "uber reñaca", "uber matri cony", "uber cumple papa") ~ "uber",
        gasto_clean %in% c("yaz", "yaz ") ~ "yaz",
        gasto_clean %in% c("limpieza alfombras", "limpieza alfombra") ~ "limpieza alfombras",
        gasto_clean %in% c("vacuna influenza", "vacunas influenza") ~ "vacuna influenza",
        gasto_clean %in% c("aspiradora", "aspiradora ") ~ "aspiradora",
        gasto_clean %in% c("plata reciclaje y basurero",
                           "plata basurero", "plata basureros",
                           "plata fiestas patrias basureros",
                           "aporte basureros", "basureros") ~ "basureros",
        gasto_clean %in% c("donaciones", "donacion") ~ "donacion",
        gasto_clean %in% c("prestamo", "prestamo andres", "prestamo andres", "prestamo andres ") ~ "prestamo andres",
        TRUE ~ gasto_clean
      ),
      # 3) (Opcional) Variable más agregada de tipo de gasto
      gasto_cat = dplyr::case_when(
        gasto_clean == "comida"                       ~ "Comida",
        gasto_clean %in% c("enceres")                 ~ "Enceres",
        gasto_clean == "diosi"                        ~ "Diosi",
        gasto_clean %in% c("vtr", "netflix",
                           "crunchyroll", "entel")    ~ "Streaming/Telefonia",
        gasto_clean %in% c("electricidad", "gas",
                           "parafina", "kerosen")     ~ "Servicios básicos",
        gasto_clean %in% c("agua")                    ~ "Agua",
        gasto_clean %in% c("uber", "transporte",
                           "gas/bencina", "bencina + tag",
                           "bencina reñaca",
                           "bencina + peajes maite",
                           "bencina + tag cumple delox") ~ "Transporte",
        gasto_clean %in% c("farmacia", "remedio",
                           "remedios", "remedios covid",
                           "gaviscón y paracetamol",
                           "nexium")                 ~ "Farmacia/Salud",
        gasto_clean %in% c("electrodomesticos/mantencion casa",
                           "microondas regalo", "aspiradora",
                           "muebles ratan", "mouse",
                           "reloj")                  ~ "Electrodomesticos",
        TRUE ~ stringr::str_to_title(gasto_clean)  # default: limpio pero no agregado
      )
    ) %>%
    dplyr::mutate(
      gasto_cat10 = dplyr::case_when(
        # 1) COMIDA (incluye los “comida” especiales de tu criterio viejo)
        gasto_cat %in% c(
          "Comida",
          "Pan Pepperino",
          "Cookidoo",
          "Granola Wild Foods",
          "Wild Protein",
          "Brussels"
        ) ~ "Comida",
        # 2) ENCERES
        gasto_cat %in% c(
          "Enceres",
          "Incoludido",
          "Tres Toques"
        ) ~ "Enceres",
        # 3) DIOSI
        gasto_cat == "Diosi" ~ "Diosi",
        # 4) SERVICIOS BÁSICOS (agua, luz, gas/bencina, etc.)
        gasto_cat %in% c(
          "Servicios básicos",
          "Agua",
          "Bencina Renaca"
        ) ~ "Servicios básicos",
        # 5) FARMACIA (todo lo que en tu criterio iba a Farmacia)
        gasto_cat %in% c(
          "Farmacia/Salud",
          "Yaz",
          "Gaviscon Y Paracetamol",
          "Vacuna Influenza",
          "Cruz Verde"
        ) ~ "Farmacia",
        # 6) TRANSPORTE (uber, viajes, bencina que antes era Transporte)
        gasto_cat %in% c(
          "Transporte",
          "Uber Renaca",
          "Viaje Brasil"
        ) ~ "Transporte",
        # 7) ELECTRODOMÉSTICOS / MANTENCIÓN CASA
        gasto_cat %in% c(
          "Electrodomesticos",
          "Cortina Bano",
          "Filtro Agua",
          "Filtro Piscina Mspa",
          "Mantencion Toyotomi",
          "Limpieza Alfombras",
          "Sopapo",
          "Pila Estufa",
          "Jardinero",
          "Camaras Seguridad M.barrios",
          "Pago Camaras Mb",
          "Tina",
          "Nacho",
          "Lamina",
          "Chromecast",
          "Easy"
        ) ~ "Electrodomesticos/mantencion casa",
        # 8) STREAMING / TELEFONÍA (Netflix / VTR / Crunchyroll / Entel, que ya aglutinaste en gasto_cat)
        gasto_cat %in% c(
          "Streaming/Telefonia"
        ) ~ "Streaming/Telefonia",
        # 9) DONACIONES / REGALOS / APORTES
        gasto_cat %in% c(
          "Basureros",
          "Donacion",
          "Regalo Chocolates",
          "Regalo Matri Chepa",
          "Regalo Matri Cony",
          "Matri Andres Kogan",
          "Rgalo Chepa",
          "Ropa",
          "Ropa Tami",
          "Assistcard Viaje"
        ) ~ "Donaciones/regalos",
        # 10) TODO LO DEMÁS → OTROS
        TRUE ~ "Otros"
      )
    ) %>%
    dplyr::group_by(gastador, fecha, gasto_cat10, .drop=F) %>%
    dplyr::summarise(monto=sum(monto)) %>% 
    dplyr::mutate(gastador_nombre=plyr::revalue(as.character(gastador), c("0" = "Tami", "1"="Andrés")))




Gastos_casa_gastador_cat_gasto %>%
    dplyr::mutate(fecha = as.Date(fecha)) %>%       # por si viene como POSIXct
    dplyr::group_by(gasto_cat10) %>%                     # opcional: quita ítems con 1 obs
    dplyr::filter(n() >= 6) %>%   
    dplyr::ungroup() %>%
ggplot2::ggplot(aes(x = fecha, y = monto, color=as.factor(gastador_nombre))) +
  #stat_summary(geom = "line", fun.y = median, size = 1, alpha=0.5, aes(color="blue")) +
  geom_line(size=1) +
  facet_grid(gasto_cat10~.)+
  geom_vline(xintercept = as.Date("2019-06-24"),linetype = "dashed") +
  labs(y="Gastos (en miles)",x="Años", subtitle="Interlineado, incorporación de la Diosi; Azul= Tami; Rojo= Andrés") +
  ggtitle( "Figura 6. Gastos Semanales por Gastador e ítem (media) [n>5]") +
  scale_y_continuous(labels = f <- function(x) paste0(x/1000)) + 
  scale_color_manual(name = "Gastador", values= c("blue", "red"), labels = c("Tami", "Andrés")) +
  tsibble::scale_x_yearweek(breaks = "1 year", minor_breaks = "3 months", labels= scales::date_format("%m/%y")) +
  guides(color = F)+
  theme_custom() +
  theme(axis.text.x = element_text(vjust = 0.5,angle = 35)) +
  theme(
    panel.border = element_blank(), 
    #panel.grid.major = element_blank(),
    #panel.grid.minor = element_blank(), 
    panel.grid.minor = element_line(color = "grey90", size = 0.3),
    panel.grid.major = element_line(color = "grey75", size = 0.5),    
    axis.line = element_line(colour = "black")
    )

# Apply MSTL decomposition

Gastos_casa |> 
    dplyr::mutate(fecha= readr::parse_date(fecha, format="%d/%m/%Y")) |> 
    dplyr::arrange(fecha) |>  pull(monto) -> monto_ts
Gastos_casa |> 
    dplyr::mutate(fecha= readr::parse_date(fecha, format="%d/%m/%Y")) |> 
    dplyr::arrange(fecha) |>  pull(fecha) -> fecha_ts

mstl_data_autplt <- forecast::mstl(monto_ts, lambda = "auto",iterate=5000000,start = 
lubridate::decimal_date(as.Date("2019-03-03")))

# Convert the decomposed time series to a data frame
mstl_df <- data.frame(
  Date = as.Date(fecha_ts, format="%d/%m/%Y"),
  Data = as.numeric(mstl_data_autplt[, "Data"]),
  Trend = as.numeric(mstl_data_autplt[, "Trend"]),
  Remainder = as.numeric(mstl_data_autplt[, "Remainder"])
)

# Reshape the data frame for ggplot2
mstl_long <- mstl_df %>%
  dplyr::arrange(Date) %>%
  tidyr::pivot_longer(cols = -Date, names_to = "Component", values_to = "Value")

mstl_long_filtered <- mstl_long %>% dplyr::filter(!(Date %in% (mstl_long %>% dplyr::distinct(Date) %>% top_n(7, Date) %>% pull(Date))))

# Plotting with ggplot2
ggplot(mstl_long_filtered, aes(x = Date, y = Value)) +
  geom_line() +
  theme_bw() + 
  labs(title = "Descomposición MSTL (- 7 days)", x = "Fecha", y = "Valor") +
  scale_x_date(date_breaks = "3 months", date_labels = "%m-%Y") +
  facet_wrap(~ Component, scales = "free_y", ncol = 1) +
  theme(strip.text = element_text(size = 12),
        axis.text.x = element_text(angle = 90, hjust = 1))

ts_week_covid<-  
Gastos_casa %>%
    dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>% 
    dplyr::mutate(fecha_week=strftime(fecha, format = "%Y-W%V")) %>%
    dplyr::mutate(day=as.Date(as.character(lubridate::floor_date(fecha, "day"))))%>%
    dplyr::group_by(fecha_week)%>%
    dplyr::summarise(gasto_total=sum(monto,na.rm=T)/1000,min_day=min(day))%>%
    dplyr::ungroup() %>% 
    dplyr::mutate(covid=dplyr::case_when(min_day>=as.Date("2020-03-17")~1,TRUE~0))%>%
    dplyr::mutate(covid=as.factor(covid))%>%
    data.frame()


ts_week_covid$gasto_total_na<-ts_week_covid$gasto_total
post_resp<-ts_week_covid$gasto_total[which(ts_week_covid$covid==1)]
ts_week_covid$gasto_total_na[which(ts_week_covid$covid==1)]<-NA
ts_week_covid$gasto_total[which(ts_week_covid$covid==0)]
##  [1]  98.357   4.780  56.784  50.506  64.483  67.248  49.299  35.786  58.503
## [10]  64.083  20.148  73.476 127.004  81.551  69.599 134.446  58.936  26.145
## [19] 129.927 104.989 130.860  81.893  95.697  64.579 303.471 151.106  49.275
## [28]  76.293  33.940  83.071 119.512  20.942  58.055  71.728  44.090  33.740
## [37]  59.264  77.410  60.831  63.376  48.754 235.284  29.604 115.143  72.419
## [46]   5.980  80.063 149.178  69.918 107.601  72.724  63.203  99.681 130.309
## [55] 195.898 112.066
# 1) Create corpus
corpus <- tm::Corpus(tm::VectorSource(Gastos_casa$obs))

# 2) Preprocess text
d <- corpus |>
  tm::tm_map(tm::content_transformer(tolower)) |>
  tm::tm_map(tm::stripWhitespace) |>
  tm::tm_map(tm::removePunctuation) |>
  tm::tm_map(tm::removeNumbers) |>
  tm::tm_map(tm::removeWords, tm::stopwords("spanish")) |>
  tm::tm_map(tm::removeWords, "menos")

# 3) Term-document matrix
tdm <- tm::TermDocumentMatrix(d)

# 4) Convert to matrix
m <- base::as.matrix(tdm)

# 5) Frequencies
v  <- base::sort(base::rowSums(m), decreasing = TRUE)

df <- base::data.frame(
  word = base::names(v),
  freq = v,
  row.names = NULL
)

# 6) Wordcloud
wordcloud::wordcloud(
  words       = df$word,
  freq        = df$freq,
  max.words   = 100,
  random.order= FALSE,
  rot.per     = 0.35,
  colors      = RColorBrewer::brewer.pal(8, "Dark2"),
  scale       = c(4, 0.5)
)

fit_month_gasto <- Gastos_casa_gastador_cat_gasto %>%
  dplyr::ungroup() %>%
  dplyr::mutate(
    # month string YYYY-MM
    fecha_month = base::format(fecha, "%Y-%m")
  ) %>%
  dplyr::mutate(
    # order months from 2019-03 until today (like in your original code)
    fecha_month = base::factor(
      fecha_month,
      levels = base::format(
        base::seq(
          from = base::as.Date("2019-03-03"),
          to   = base::as.Date(base::substr(base::Sys.time(), 1, 10)),
          by   = "1 month"
        ),
        "%Y-%m"
      )
    )
  ) %>%
  dplyr::group_by(fecha_month, gasto_cat10, .drop = FALSE) %>%
  dplyr::summarise(
    gasto_total = base::sum(monto, na.rm = TRUE) / 1000,
    .groups = "drop"
  ) %>%
  base::as.data.frame()

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
current_ym <- base::format(base::Sys.Date(), "%Y-%m")

tabla_gasto_year_item <-
    fit_month_gasto %>%
    dplyr::mutate(
        year = base::substr(base::as.character(fecha_month), 1, 4),
        ym   = base::as.character(fecha_month)
    ) %>%
    # opcional: excluir el mes actual (incompleto)
    dplyr::filter(ym != current_ym) %>%
    # si quieres solo ciertos años:
    # dplyr::filter(year %in% c("2020","2021","2022","2023","2024","2025")) %>%
    dplyr::group_by(gasto_cat10, year) %>%
    dplyr::summarise(
        gasto_prom = base::mean(gasto_total, na.rm = TRUE),
        .groups    = "drop"
    ) %>%
    tidyr::pivot_wider(
        names_from  = year,
        values_from = gasto_prom
    ) %>%
    janitor::adorn_totals()  # agrega fila "Total"

tabla_gasto_year_item |> 
  knitr::kable("markdown",
    caption = "Gasto Promedio Mensual por Ítem y Año (suma por categoría y mes)",
    digits  = 1)
Gasto Promedio Mensual por Ítem y Año (suma por categoría y mes)
gasto_cat10 2019 2020 2021 2022 2023 2024 2025
Comida 208.4 392.9 317.9 312.4 366.0 326.9 359.0
Diosi 84.2 74.2 79.0 53.5 74.4 50.0 50.8
Donaciones/regalos NA 34.2 86.0 49.3 45.0 34.0 NA
Electrodomesticos/mantencion casa NA 57.8 196.4 27.6 43.9 63.4 42.7
Enceres 62.7 78.0 35.5 36.3 36.5 48.0 14.6
Farmacia NA 33.7 33.6 17.0 32.1 NA NA
Otros NA NA 90.0 60.0 5.2 229.5 14.1
Servicios básicos 73.2 57.4 73.4 105.6 85.7 129.3 95.2
Streaming/Telefonia 22.1 29.8 26.1 32.3 25.9 26.3 22.0
Transporte NA NA NA 26.4 32.9 242.2 35.8
Total 450.6 758.1 938.0 720.4 747.7 1149.5 634.2


2. UF Proyectada

Saqué la UF proyectada

#options(max.print=5000)

uf18 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2018.htm")%>% rvest::html_nodes("table")
uf19 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2019.htm")%>% rvest::html_nodes("table")
uf20 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2020.htm")%>% rvest::html_nodes("table")
uf21 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2021.htm")%>% rvest::html_nodes("table")
uf22 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2022.htm")%>% rvest::html_nodes("table")
uf23 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2023.htm")%>% rvest::html_nodes("table")
uf24 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2024.htm")%>% rvest::html_nodes("table")

tryCatch(uf25 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2025.htm")%>% rvest::html_nodes("table"),
    error = function(c) {
      uf24b <<- cbind.data.frame(Día=NA, variable=NA, value=NA)
      
    }
  )

tryCatch(uf25 <-uf25[[length(uf25)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1),
    error = function(c) {
      uf25 <<- cbind.data.frame(Día=NA, variable=NA, value=NA)
    }
)

uf_serie<-
bind_rows(
cbind.data.frame(anio= 2018, uf18[[length(uf18)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2019, uf19[[length(uf19)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2020, uf20[[length(uf20)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2021, uf21[[length(uf21)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2022, uf22[[length(uf22)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2023, uf23[[length(uf23)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2024, uf23[[length(uf24)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2025, uf25)
)

uf_serie_corrected<-
uf_serie %>% 
dplyr::mutate(month=plyr::revalue(tolower(.[[3]]),c("ene" = 1, "feb"=2, "mar"=3, "abr"=4, "may"=5, "jun"=6, "jul"=7, "ago"=8, "sep"=9, "oct"=10, "nov"=11, "dic"=12))) %>% 
  dplyr::mutate(value=stringr::str_trim(value), value= sub("\\.","",value),value= as.numeric(sub("\\,",".",value))) %>% 
  dplyr::mutate(date=paste0(sprintf("%02d", .[[2]])," ",sprintf("%02d",as.numeric(month)),", ",.[[1]]), date3=lubridate::parse_date_time(date,c("%d %m, %Y"),exact=T),date2=date3) %>% 
   na.omit()#%>%  dplyr::filter(is.na(date3))
## Warning: There was 1 warning in `dplyr::mutate()`.
## i In argument: `date3 = lubridate::parse_date_time(date, c("%d %m, %Y"), exact
##   = T)`.
## Caused by warning:
## !  54 failed to parse.
#Day of the month as decimal number (1–31), with a leading space for a single-digit number.
#Abbreviated month name in the current locale on this platform. (Also matches full name on input: in some locales there are no abbreviations of names.)

warning(paste0("number of observations:",nrow(uf_serie_corrected),",  min uf: ",min(uf_serie_corrected$value),",  min date: ",min(uf_serie_corrected $date3 )))
## Warning: number of observations:2921, min uf: 26799.01, min date: 2018-01-01
# 
# uf_proyectado <- readxl::read_excel("uf_proyectado.xlsx") %>% dplyr::arrange(Período) %>% 
#   dplyr::mutate(Período= as.Date(lubridate::parse_date_time(Período, c("%Y-%m-%d"),exact=T)))
ts_uf_proy<-
ts(data = uf_serie_corrected$value, 
   start = as.numeric(as.Date("2018-01-01")), 
   end = as.numeric(as.Date(uf_serie_corrected$date3[length(uf_serie_corrected$date3)])), frequency = 1,
   deltat = 1, ts.eps = getOption("ts.eps"))

fit_tbats <- forecast::tbats(ts_uf_proy)

fr_fit_tbats<-forecast::forecast(fit_tbats, h=298)
# Configurar API Key
if(nchar(Sys.getenv("API_NIXTLA"))<2){
  Sys.setenv("API_NIXTLA"=as.character(read.table(paste0(dirname(getwd()),"/nixtlar_api_key.txt"), quote="\"", comment.char="")))
}
if(nchar(Sys.getenv("API_NIXTLA"))<2){
  try(Sys.setenv("API_NIXTLA"=as.character(read.table(paste0(gsub("/gastos","",dirname(rstudioapi::getActiveDocumentContext()$path)),"/nixtlar_api_key.txt"), quote="\"", comment.char=""))))
}

try(nixtlar::nixtla_set_api_key(Sys.getenv("NIXTLA")))
## API key has been set for the current session.
if(nchar(Sys.getenv("NIXTLA"))<2){
  nixtlar::nixtla_set_api_key(Sys.getenv("API_NIXTLA"))
}
## API key has been set for the current session.
# Preparar datos en formato requerido por TimeGPT
uf_timegpt <- uf_serie_corrected %>%
    dplyr::rename(ds = date3, y = value) %>%
    dplyr::mutate(ds = format(ds, "%Y-%m-%d")) %>%
    dplyr::mutate(unique_id = "serie_1")%>%
    dplyr::select(unique_id, ds, y)

# Realizar pronóstico con TimeGPT
timegpt_fcst <- nixtlar::nixtla_client_forecast(
  uf_timegpt,
  h = 298,               # 298 días a pronosticar
  freq = "D",            # Frecuencia diaria
  add_history = FALSE,     # Incluir datos históricos en el output
  level = c(80,95),
  model=  "timegpt-1-long-horizon", 
  clean_ex_first = TRUE
)
## The specified horizon h exceeds the model horizon. This may lead to less accurate forecasts. Please consider using a smaller horizon.
# The Conflict: The API endpoint for the long-horizon model likely does not support generating "fitted values" for the historical input data, causing the server to return "Unprocessable Entity" (422).

# 1. Convertir 'ds' a fecha en ambas tablas
uf_timegpt <- uf_timegpt %>% 
    mutate(ds = as.Date(ds))

timegpt_fcst <- timegpt_fcst %>% 
    mutate(ds = as.Date(ds))

# 2. Combinar los datos históricos y el pronóstico
full_data <- bind_rows(
    uf_timegpt %>% mutate(type = "Histórico"),
    timegpt_fcst %>% mutate(type = "Pronóstico")
)


# Ensure dates are Date objects
uf_timegpt <- uf_timegpt %>% mutate(ds = as.Date(ds))
timegpt_fcst <- timegpt_fcst %>% mutate(ds = as.Date(ds))

ggplot() +
    # --- FORECAST LAYERS (Map y to 'TimeGPT') ---
    # 95% Confidence Interval
    geom_ribbon(data = timegpt_fcst, 
                aes(x = ds, ymin = `TimeGPT-lo-95`, ymax = `TimeGPT-hi-95`), 
                fill = "#4B9CD3", alpha = 0.2) +
    # 80% Confidence Interval
    geom_ribbon(data = timegpt_fcst, 
                aes(x = ds, ymin = `TimeGPT-lo-80`, ymax = `TimeGPT-hi-80`), 
                fill = "#4B9CD3", alpha = 0.3) +
    # Forecast Line
    geom_line(data = timegpt_fcst, 
              aes(x = ds, y = TimeGPT, color = "Pronóstico"), size = 1) +

    # --- HISTORICAL LAYER (Map y to 'y') ---
    geom_line(data = uf_timegpt, 
              aes(x = ds, y = y, color = "Histórico"), size = 1) +

    # --- STYLING ---
    geom_vline(xintercept = max(uf_timegpt$ds), 
               linetype = "dashed", color = "red", size = 0.8) +
    scale_x_date(date_breaks = "3 months", date_labels = "%b %Y") +
    scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
    scale_color_manual(name = "Leyenda", 
                       values = c("Histórico" = "black", "Pronóstico" = "#4B9CD3")) +
    # Configuración del eje x
    scale_x_date(
        date_breaks = "3 months",  # Reduce la frecuencia de las etiquetas
        date_labels = "%b %Y",  # Formato de etiquetas (mes y año)
    ) +
    # Configuración del eje y
    scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
    # Configuración de colores
    scale_color_manual(
        name = "Leyenda",
        values = c("Histórico" = "black", "Pronóstico" = "#4B9CD3")
    ) +
    # Títulos y subtítulos
    labs(
        title = "Pronóstico de UF, Serie Temporal con TimeGPT",
        subtitle = "Intervalos de confianza al 80% (más oscuro) y 95% (más claro)",
        x = "Fecha",
        y = "Valor",
        color = "Leyenda"
    ) +
    # Tema y estilos
    theme_minimal() +
    theme(
        axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10),
        legend.position = "bottom",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()
    )
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

library(prophet)
## Warning: package 'prophet' was built under R version 4.4.3
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 4.4.3
## Loading required package: rlang
## Warning: package 'rlang' was built under R version 4.4.3
## 
## Attaching package: 'rlang'
## The following objects are masked from 'package:purrr':
## 
##     flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
##     flatten_raw, invoke, splice
## The following object is masked from 'package:sparklyr':
## 
##     invoke
## The following object is masked from 'package:data.table':
## 
##     :=
  model <- prophet(
  cbind.data.frame(ds= as.Date(uf_timegpt$ds), y=uf_timegpt$y),
  # Trend flexibility
  growth = "linear",
  changepoint.prior.scale = 0.05,  # Reduced for smoother trend
  n.changepoints = 50,  # Increased from default 25
  
  # Seasonality
  yearly.seasonality = TRUE,
  weekly.seasonality = TRUE,
  daily.seasonality = FALSE,  # Disabled for daily data
  seasonality.mode = "additive",
  seasonality.prior.scale = 15,  # Increased to capture stronger seasonality
  
  # Holidays (if applicable)
  # holidays = generated_holidays  # Create with add_country_holidays()
  
  # Uncertainty intervals
  interval.width = 0.95,
  uncertainty.samples = 1000
)
future <- make_future_dataframe(model, periods = 298, include_history = T)
forecast <- predict(model, future)
forecast <- forecast[, c("ds", "yhat", "yhat_lower", "yhat_upper")]
forecast$pred <- ifelse(forecast$ds > max(uf_timegpt$ds), 1,0)
## Warning in check_tzones(e1, e2): 'tzone' attributes are inconsistent
forecast$ds <- as.Date(forecast$ds)

ggplot(forecast, aes(x = ds, y = yhat)) +
  geom_ribbon(aes(ymin = yhat_lower, ymax = yhat_upper), 
              fill = "#9ecae1", alpha = 0.4) +
  geom_line(color = "#08519c", linewidth = 0.8) +
  geom_vline(xintercept = max(uf_timegpt$ds), color = "red", linetype = "dashed", linewidth=1) +
  scale_x_date(date_breaks = "6 months", date_labels = "%y %b") +
  scale_y_continuous(labels = scales::comma) +
  labs(title = "Valores predichos (95%IC)",
      # subtitle = "March 10, 2025 - May 7, 2025",
       x = "Fecha",
       y = "Valor",
      # caption = "Source: Prophet Forecast Model"
      ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(color = "gray50"),
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    plot.caption = element_text(color = "gray30")
  )

# 1. Ensure dates are strictly Date objects
history_df <- uf_timegpt %>% mutate(ds = as.Date(ds))
fcst_df    <- timegpt_fcst %>% mutate(ds = as.Date(ds))

# 2. Plotting (Notice we don't bind_rows, we layer them)
ggplot() +
    # --- FORECAST LAYERS ---
    # 95% Confidence Interval
    geom_ribbon(data = fcst_df, 
                aes(x = ds, ymin = `TimeGPT-lo-95`, ymax = `TimeGPT-hi-95`), 
                fill = "#4B9CD3", alpha = 0.2) +
    # 80% Confidence Interval
    geom_ribbon(data = fcst_df, 
                aes(x = ds, ymin = `TimeGPT-lo-80`, ymax = `TimeGPT-hi-80`), 
                fill = "#4B9CD3", alpha = 0.3) +
    # Forecast Line (Map y to the 'TimeGPT' column)
    geom_line(data = fcst_df, 
              aes(x = ds, y = TimeGPT, color = "Pronóstico"), size = 1) +

    # --- HISTORY LAYER ---
    # History Line (Map y to the 'y' column)
    geom_line(data = history_df, 
              aes(x = ds, y = y, color = "Histórico"), size = 1) +

    # --- STYLING ---
    geom_vline(xintercept = max(history_df$ds), 
               linetype = "dashed", color = "red", size = 0.8) +
    scale_x_date(date_breaks = "3 months", date_labels = "%b %Y") +
    scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
    scale_color_manual(name = "Leyenda", 
                       values = c("Histórico" = "black", "Pronóstico" = "#4B9CD3")) +
    labs(
        title = "Pronóstico de Serie Temporal con TimeGPT",
        subtitle = "Modelo: timegpt-1-long-horizon",
        x = "Fecha", y = "Valor"
    ) +
    theme_minimal() +
    theme(
      plot.title = element_text(face = "bold", size = 14),
      plot.subtitle = element_text(color = "gray50"),
      axis.text.x = element_text(angle = 45, hjust = 1),
      panel.grid.minor = element_blank(),
      panel.border = element_blank(),
      plot.caption = element_text(color = "gray30")
    )+
      theme(legend.position = "bottom")

La proyección de la UF a 298 días más 2025-12-31 00:04:58 sería de: 26.644 pesos// Percentil 95% más alto proyectado: 35.062,81

Según TimeGPT: La proyección de la UF a 298 días más 2026-10-25 sería de: 40.226,56 pesos// Percentil 80% más alto proyectado: 41.542,98 pesos// Percentil 95% más alto proyectado: 41.902,12

Según prophet: La proyección de la UF a 298 días más 2026-10-25 sería de: 42.543 pesos// Percentil 95% más alto proyectado: 50.362

Ahora con un modelo ARIMA automático


arima_optimal_uf = forecast::auto.arima(ts_uf_proy)

  autoplotly::autoplotly(forecast::forecast(arima_optimal_uf, h=298), ts.colour = "darkred",
           predict.colour = "blue", predict.linetype = "dashed")%>% 
  plotly::layout(showlegend = F, 
          yaxis = list(title = "Gastos"),
         xaxis = list(
    title="Fecha",
      ticktext = as.list(seq(from = as.Date("2018-01-01"), 
                                  to = as.Date("2018-01-01")+length(fit_tbats$fitted.values)+298, by = 90)), 
      tickvals = as.list(seq(from = as.numeric(as.Date("2018-01-01")), 
                             to = as.numeric(as.Date("2018-01-01"))+length(fit_tbats$fitted.values)+298, by = 90)),
      tickmode = "array",
    tickangle = 90
    ))
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## i Please use tidy evaluation idioms with `aes()`.
## i See also `vignette("ggplot2-in-packages")` for more information.
## i The deprecated feature was likely used in the ggfortify package.
##   Please report the issue at <https://github.com/sinhrks/ggfortify/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
fr_fit_tbats_uf<-forecast::forecast(arima_optimal_uf, h=298)
dplyr::group_by(reshape2::melt(data.frame(fr_fit_tbats)),variable) %>% dplyr::summarise(max=max(value)) %>% 
dplyr::right_join(dplyr::group_by(reshape2::melt(data.frame(fr_fit_tbats_uf)),variable) %>% dplyr::summarise(max=max(value)),by="variable") %>% 
  dplyr::mutate(variable=factor(variable,levels=c("Lo.95","Lo.80","Point.Forecast","Hi.80","Hi.95"))) %>% 
  dplyr::arrange(variable) %>% 
  knitr::kable(format="markdown", caption="Tabla. Estimación UF (de aquí a 298 días) según cálculos de gastos mensuales",
               col.names= c("Item","UF Proyectada (TBATS)","UF Proyectada (ARIMA)"))
## No id variables; using all as measure variables
## No id variables; using all as measure variables
Tabla. Estimación UF (de aquí a 298 días) según cálculos de gastos mensuales
Item UF Proyectada (TBATS) UF Proyectada (ARIMA)
Lo.95 26261.74 26322.41
Lo.80 26393.55 26487.38
Point.Forecast 26644.35 26799.01
Hi.80 31444.73 32178.58
Hi.95 34326.70 35026.36


3. Gastos proyectados

Lo haré en base a 2 cálculos: el gasto semanal y el gasto mensual en base a mis gastos desde marzo de 2019. La primera proyección la hice añadiendo el precio del arriendo mensual y partiendo en 2 (porque es con yo y Tami). No se incluye el último mes.

Gastos_casa_nvo <- readr::read_csv(as.character(path_sec),
                               col_names = c("Tiempo", "gasto", "fecha", "obs", "monto", "gastador",
                                             "link"),skip=1) %>% 
              dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>% 
              dplyr::mutate(fecha_month=strftime(fecha, format = "%Y-%m")) %>%
              dplyr::mutate(day=as.Date(as.character(lubridate::floor_date(fecha, "day"))))

Gastos_casa_m <-
Gastos_casa_nvo %>% dplyr::group_by(fecha_month)%>%
              dplyr::summarise(gasto_total=(sum(monto)+500000)/1000,fecha=first(fecha))%>%
              data.frame()
  

uf_serie_corrected_m <-
uf_serie_corrected %>% dplyr::mutate(ano_m=paste0(anio,"-",sprintf("%02d",as.numeric(month)))) %>%  dplyr::group_by(ano_m)%>%
              dplyr::summarise(uf=(mean(value))/1000,fecha=first(date3))%>%
              data.frame() %>% 
  dplyr::filter(fecha>="2019-02-28")
#Error: Error in standardise_path(file) : object 'enlace_gastos' not found

ts_uf_serie_corrected_m<-
ts(data = uf_serie_corrected_m$uf[-length(uf_serie_corrected_m$uf)], 
   start = 1, 
   end = nrow(uf_serie_corrected_m), 
   frequency = 1,
   deltat = 1, ts.eps = getOption("ts.eps"))

ts_gastos_casa_m<-
ts(data = Gastos_casa_m$gasto_total[-length(Gastos_casa_m$gasto_total)], 
   start = 1, 
   end = nrow(Gastos_casa_m), 
   frequency = 1,
   deltat = 1, ts.eps = getOption("ts.eps"))

fit_tbats_m <- forecast::tbats(ts_gastos_casa_m)

seq_dates<-format(seq(as.Date("2019/03/01"), by = "month", length = dim(Gastos_casa_m)[1]+12), "%m\n'%y")

autplo2t<-
  autoplotly::autoplotly(forecast::forecast(fit_tbats_m, h=12), ts.colour = "darkred",
           predict.colour = "blue", predict.linetype = "dashed")%>% 
  plotly::layout(showlegend = F, 
          yaxis = list(title = "Gastos (en miles)+ Arriendo"),
         xaxis = list(
    title="Fecha",
      ticktext = as.list(seq_dates[seq(from = 1, to = (dim(Gastos_casa_m)[1]+12), by = 3)]), 
      tickvals = as.list(seq(from = 1, to = (dim(Gastos_casa_m)[1]+12), by = 3)),
      tickmode = "array"#"array"
    )) 

autplo2t

Ahora asumiendo un modelo ARIMA, e incluimos como regresor al precio de la UF.

paste0("Optimo pero sin regresor")
## [1] "Optimo pero sin regresor"
arima_optimal = forecast::auto.arima(ts_gastos_casa_m)
arima_optimal
## Series: ts_gastos_casa_m 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1       mean
##       0.4363  1036.0220
## s.e.  0.1004    37.9035
## 
## sigma^2 = 39420:  log likelihood = -556.02
## AIC=1118.04   AICc=1118.34   BIC=1125.29
paste0("Optimo pero con regresor")
## [1] "Optimo pero con regresor"
arima_optimal2 = forecast::auto.arima(ts_gastos_casa_m, xreg=as.numeric(ts_uf_serie_corrected_m[1:(length(Gastos_casa_m$gasto_total))]))
arima_optimal2
## Series: ts_gastos_casa_m 
## Regression with ARIMA(1,0,0) errors 
## 
## Coefficients:
##          ar1  intercept     xreg
##       0.4409   542.3021  15.2093
## s.e.  0.1033   302.0364   9.1269
## 
## sigma^2 = 37720:  log likelihood = -547.5
## AIC=1103.01   AICc=1103.53   BIC=1112.64
forecast_uf<-
cbind.data.frame(fecha=as.Date(seq(as.numeric(as.Date(uf_serie_corrected$date3[length(uf_serie_corrected$date3)])),(as.numeric(as.Date(uf_serie_corrected$date3[length(uf_serie_corrected$date3)]))+299),by=1), origin = "1970-01-01"),forecast::forecast(fit_tbats, h=300)) %>% 
  dplyr::mutate(ano_m=stringr::str_extract(fecha,".{7}")) %>% 
  dplyr::group_by(ano_m)%>%
              dplyr::summarise(uf=(mean(`Hi 95`,na.rm=T))/1000,fecha=first(fecha))%>%
            data.frame()
autplo2t2<-
  autoplotly::autoplotly(forecast::forecast(arima_optimal2,xreg=c(forecast_uf$uf[1],forecast_uf$uf), h=12), ts.colour = "darkred",
           predict.colour = "blue", predict.linetype = "dashed")%>% 
  plotly::layout(showlegend = F, 
          yaxis = list(title = "Gastos (en miles)"),
         xaxis = list(
    title="Fecha",
      ticktext = as.list(seq_dates[seq(from = 1, to = (dim(Gastos_casa_m)[1]+12), by = 3)]), 
      tickvals = as.list(seq(from = 1, to = (dim(Gastos_casa_m)[1]+12), by = 3)),
      tickmode = "array"#"array"
    )) 

autplo2t2
fr_fit_tbats_m<-forecast::forecast(fit_tbats_m, h=12)
fr_fit_tbats_m2<-forecast::forecast(arima_optimal, h=12)
fr_fit_tbats_m3<-forecast::forecast(arima_optimal2, h=12,xreg=c(forecast_uf$uf[1],forecast_uf$uf))

dplyr::right_join(dplyr::group_by(reshape2::melt(data.frame(fr_fit_tbats_m3)),variable) %>% dplyr::summarise(max=max(value)), dplyr::group_by(reshape2::melt(data.frame(fr_fit_tbats_m2)),variable) %>% dplyr::summarise(max=max(value)),by="variable") %>% 
dplyr::right_join(dplyr::group_by(reshape2::melt(data.frame(fr_fit_tbats_m)),variable) %>% dplyr::summarise(max=max(value)),by="variable") %>% 
  dplyr::mutate(variable=factor(variable,levels=c("Lo.95","Lo.80","Point.Forecast","Hi.80","Hi.95"))) %>% 
  dplyr::arrange(variable) %>% 
  knitr::kable(format="markdown", caption="Estimación en miles de la plata a gastar en el futuro (de aquí a 12 meses) según cálculos de gastos mensuales",
               col.names= c("Item","Modelo ARIMA con regresor (UF)","Modelo ARIMA sin regresor","Modelo TBATS")) 
## No id variables; using all as measure variables
## No id variables; using all as measure variables
## No id variables; using all as measure variables
Estimación en miles de la plata a gastar en el futuro (de aquí a 12 meses) según cálculos de gastos mensuales
Item Modelo ARIMA con regresor (UF) Modelo ARIMA sin regresor Modelo TBATS
Lo.95 637.9351 603.5377 657.6140
Lo.80 784.7300 753.2309 759.2202
Point.Forecast 1062.0321 1036.0081 994.2818
Hi.80 1339.3343 1318.7854 1314.7389
Hi.95 1486.1292 1468.4786 1522.8554


4. Gastos mensuales (resumen manual)

path_sec2<- paste0("https://docs.google.com/spreadsheets/d/",Sys.getenv("SUPERSECRET"),"/export?format=csv&id=",Sys.getenv("SUPERSECRET"),"&gid=847461368")

Gastos_casa_mensual_2022 <- readr::read_csv(as.character(path_sec2),
                #col_names = c("Tiempo", "gasto", "fecha", "obs", "monto", "gastador","link"),
                skip=0)
## Rows: 83 Columns: 4
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): mes_ano
## dbl (3): n, Tami, Andrés
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Gastos_casa_mensual_2022,5) %>% 
  knitr::kable("markdown",caption="Resumen mensual, primeras 5 observaciones")
Resumen mensual, primeras 5 observaciones
n mes_ano Tami Andrés
1 marzo_2019 175533 68268
2 abril_2019 152640 55031
3 mayo_2019 152985 192219
4 junio_2019 291067 84961
5 julio_2019 241389 205893


(
Gastos_casa_mensual_2022 %>% 
    reshape2::melt(id.var=c("n","mes_ano")) %>%
  dplyr::mutate(gastador=as.factor(variable)) %>% 
  dplyr::select(-variable) %>% 
 ggplot2::ggplot(aes(x = n, y = value, color=gastador)) +
  scale_color_manual(name="Gastador", values=c("red", "blue"))+
  geom_line(size=1) +
  #geom_vline(xintercept = as.Date("2019-06-24"),linetype = "dashed") +
  labs(y="Gastos (en miles)",x="Meses", subtitle="Azul= Tami; Rojo= Andrés") +
  ggtitle( "Gastos Mensuales (total manual)") +
  scale_y_continuous(labels = f <- function(x) paste0(x/1000)) + 
#  scale_color_manual(name = "Gastador", values= c("blue", "red"), labels = c("Tami", "Andrés")) +
#  scale_x_yearweek(breaks = "1 month", minor_breaks = "1 week", labels=date_format("%m/%y")) +
 # guides(color = F)+
  theme_custom() +
  theme(axis.text.x = element_text(vjust = 0.5,angle = 35)) +
  theme(
    panel.border = element_blank(), 
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), 
    axis.line = element_line(colour = "black")
    )
) %>% ggplotly()
Gastos_casa_mensual_2022$mes_ano <- gsub("marzo", "Mar", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("abril", "Apr", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("mayo", "May", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("junio", "Jun", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("julio", "Jul", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("agosto", "Aug", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("septiembre", "Sep", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("octubre", "Oct", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("noviembre", "Nov", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("diciembre", "Dec", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("enero", "Jan", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("febrero", "Feb", Gastos_casa_mensual_2022$mes_ano)

Gastos_casa_mensual_2022<- dplyr::filter(Gastos_casa_mensual_2022, !is.na(Tami))

Gastos_casa_mensual_2022$mes_ano <- parse_date_time(Gastos_casa_mensual_2022$mes_ano, "%b_%Y")

Gastos_casa_mensual_2022$mes_ano <- as.Date(as.character(Gastos_casa_mensual_2022$mes_ano))

Gastos_casa_mensual_2022_timegpt <- Gastos_casa_mensual_2022 %>%
  mutate(value = Tami + Andrés) %>%
  rename(ds = mes_ano, y = value) %>%
  mutate(#ds= format(ds, "%Y-%m"),
         unique_id = "1") %>% #it is only one series
  select(unique_id, ds, y)

#Convertir la base de UF a mensual
uf_timegpt_my <- uf_serie_corrected %>%
  dplyr::rename(ds = date3, y = value) %>%
  dplyr::mutate(ds = format(ds, "%Y-%m-%d")) %>%
  dplyr::mutate(unique_id = "serie_1")%>%
  dplyr::select(unique_id, ds, y) %>%
  mutate(ds = ymd(ds)) %>%  # Convert 'ds' to Date
  mutate(month = month(ds), year = year(ds)) %>%  # Extract month and year
  group_by(month, year) %>%  # Group by month and year
  summarise(average_y = mean(y))%>%  # Calculate average y
  mutate(ds = as.Date(paste0(year,"-",month, "-01")))%>%
  ungroup()%>%
  select(ds, uf=average_y)

Gastos_casa_mensual_2022_timegpt_ex<-
Gastos_casa_mensual_2022_timegpt |> 
  dplyr::left_join(uf_timegpt_my, by=c("ds"="ds")) 

#Historical Exogenous Variables: These should be included in the input data immediately following the id_col, ds, and y columns
gastos_timegpt_fcst <- nixtlar::nixtla_client_forecast(
  Gastos_casa_mensual_2022_timegpt_ex,
  h = 12,
  freq = "M",  # Monthly frequency
  add_history = F,
  level = c(80, 95),
  model = "timegpt-1",#"timegpt-1-long-horizon",
  clean_ex_first = TRUE
)

# Convert 'ds' to Date format in both tables
Gastos_casa_mensual_2022_timegpt_corr <- Gastos_casa_mensual_2022_timegpt %>%
  mutate(ds = as.Date(paste0(ds, "-01")))  # Add day to make it a complete date

gastos_timegpt_fcst <- gastos_timegpt_fcst %>%
  mutate(ds = as.Date(paste0(ds, "-01")))  # Add day to make it a complete date

# Combine historical and forecast data
full_data_gastos <- bind_rows(
  Gastos_casa_mensual_2022_timegpt_corr %>% mutate(type = "Histórico"),
  gastos_timegpt_fcst %>% mutate(type = "Pronóstico")
)

full_data_gastos |> 
  dplyr::mutate(y= ifelse(is.na(y),TimeGPT/1000, y/1000)) |> 
# Visualize results
ggplot(aes(x = ds, y = y)) +
  geom_ribbon(aes(ymin = `TimeGPT-lo-95`/1000, ymax = `TimeGPT-hi-95`/1000),
              fill = "#4B9CD3", alpha = 0.2) +
  geom_ribbon(aes(ymin = `TimeGPT-lo-80`/1000, ymax = `TimeGPT-hi-80`/1000),
              fill = "#4B9CD3", alpha = 0.3) +
  geom_line(aes(color = type), linewidth = 1.5) +
  geom_vline(xintercept = max(filter(full_data_gastos, type == "Histórico")$ds),
             linetype = "dashed", color = "red", linewidth = 0.8) +
  scale_x_date(
    date_breaks = "3 months",
    date_labels = "%b %Y"
  ) +
  scale_y_continuous(
    name = "Gastos Totales",
    labels = scales::comma#,
    # breaks = pretty(full_data_gastos$y, n = 10),
    # expand = expansion(mult = c(0.05, 0.05))
  ) +
  scale_color_manual(
    name = "Leyenda",
    values = c("Histórico" = "black", "Pronóstico" = "#4B9CD3")
  ) +
  labs(
    title = "Pronóstico de Gastos Mensuales (TimeGPT, ajustando por UF promedio mensual)",
    subtitle = "Intervalos de confianza al 80% (más oscuro) y 95% (más claro)",
    x = "Fecha",
    y = "Gastos Totales",
    color = "Leyenda"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.title.x = element_text(size = 10),
    axis.title.y = element_text(size = 10),
    legend.position = "bottom",
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  )


Session Info

Sys.getenv("R_LIBS_USER")
## [1] "D:\\a\\_temp\\Library"
sessionInfo()
## R version 4.4.0 (2024-04-24 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows Server 2022 x64 (build 26100)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=Spanish_Chile.1252  LC_CTYPE=Spanish_Chile.1252   
## [3] LC_MONETARY=Spanish_Chile.1252 LC_NUMERIC=C                  
## [5] LC_TIME=Spanish_Chile.1252    
## system code page: 65001
## 
## time zone: UTC
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] prophet_1.0        rlang_1.1.6        Rcpp_1.1.0         scales_1.4.0      
##  [5] ggiraph_0.9.2      tidytext_0.4.3     DT_0.34.0          janitor_2.2.1     
##  [9] autoplotly_0.1.4   rvest_1.0.5        plotly_4.11.0      xts_0.14.1        
## [13] forecast_8.24.0    wordcloud_2.6      RColorBrewer_1.1-3 SnowballC_0.7.1   
## [17] tm_0.7-17          NLP_0.3-2          tsibble_1.1.6      lubridate_1.9.4   
## [21] forcats_1.0.1      dplyr_1.1.4        purrr_1.2.0        tidyr_1.3.1       
## [25] tibble_3.3.0       tidyverse_2.0.0    gsynth_1.2.1       sjPlot_2.9.0      
## [29] lattice_0.22-6     GGally_2.4.0       ggplot2_4.0.1      gridExtra_2.3     
## [33] plotrix_3.8-13     sparklyr_1.9.3     httr_1.4.7         readxl_1.4.5      
## [37] zoo_1.8-15         stringr_1.6.0      stringi_1.8.7      DataExplorer_0.8.4
## [41] data.table_1.17.8  reshape2_1.4.5     fUnitRoots_4052.82 plyr_1.8.9        
## [45] readr_2.1.6       
## 
## loaded via a namespace (and not attached):
##   [1] bitops_1.0-9            cellranger_1.1.0        lifecycle_1.0.4        
##   [4] httr2_1.2.1             StanHeaders_2.32.10     doParallel_1.0.17      
##   [7] globals_0.18.0          vroom_1.6.6             MASS_7.3-60.2          
##  [10] crosstalk_1.2.2         magrittr_2.0.4          sass_0.4.10            
##  [13] rmarkdown_2.30          jquerylib_0.1.4         yaml_2.3.10            
##  [16] fracdiff_1.5-3          otel_0.2.0              doRNG_1.8.6.2          
##  [19] askpass_1.2.1           pkgbuild_1.4.8          DBI_1.2.3              
##  [22] abind_1.4-8             quadprog_1.5-8          nnet_7.3-19            
##  [25] rappdirs_0.3.3          sandwich_3.1-1          gdtools_0.4.4          
##  [28] inline_0.3.21           data.tree_1.2.0         tokenizers_0.3.0       
##  [31] listenv_0.10.0          anytime_0.3.12          spatial_7.3-17         
##  [34] parallelly_1.45.1       codetools_0.2-20        xml2_1.5.0             
##  [37] tidyselect_1.2.1        farver_2.1.2            urca_1.3-4             
##  [40] matrixStats_1.5.0       stats4_4.4.0            jsonlite_2.0.0         
##  [43] ellipsis_0.3.2          Formula_1.2-5           iterators_1.0.14       
##  [46] systemfonts_1.3.1       foreach_1.5.2           tools_4.4.0            
##  [49] glue_1.8.0              xfun_0.54               TTR_0.24.4             
##  [52] ggfortify_0.4.19        loo_2.8.0               withr_3.0.2            
##  [55] timeSeries_4041.111     fastmap_1.2.0           openssl_2.3.4          
##  [58] caTools_1.18.3          digest_0.6.39           timechange_0.3.0       
##  [61] R6_2.6.1                lfe_3.1.1               colorspace_2.1-2       
##  [64] networkD3_0.4.1         gtools_3.9.5            generics_0.1.4         
##  [67] fontLiberation_0.1.0    htmlwidgets_1.6.4       ggstats_0.11.0         
##  [70] pkgconfig_2.0.3         gtable_0.3.6            timeDate_4051.111      
##  [73] lmtest_0.9-40           S7_0.2.1                selectr_0.5-0          
##  [76] janeaustenr_1.0.0       htmltools_0.5.8.1       fontBitstreamVera_0.1.1
##  [79] tseries_0.10-58         snakecase_0.11.1        knitr_1.51             
##  [82] rstudioapi_0.17.1       tzdb_0.5.0              nlme_3.1-164           
##  [85] curl_7.0.0              cachem_1.1.0            KernSmooth_2.23-22     
##  [88] parallel_4.4.0          fBasics_4041.97         pillar_1.11.1          
##  [91] vctrs_0.6.5             gplots_3.3.0            slam_0.1-55            
##  [94] dbplyr_2.5.1            xtable_1.8-4            evaluate_1.0.5         
##  [97] mvtnorm_1.3-3           cli_3.6.5               compiler_4.4.0         
## [100] crayon_1.5.3            rngtools_1.5.2          future.apply_1.20.0    
## [103] labeling_0.4.3          rstan_2.32.7            QuickJSR_1.8.1         
## [106] viridisLite_0.4.2       lazyeval_0.2.2          fontquiver_0.2.1       
## [109] Matrix_1.7-0            hms_1.1.4               bit64_4.6.0-1          
## [112] future_1.68.0           nixtlar_0.6.2           extraDistr_1.10.0      
## [115] igraph_2.2.1            RcppParallel_5.1.11-1   bslib_0.9.0            
## [118] quantmod_0.4.28         bit_4.6.0
#save.image("__analisis.RData")

sesion_info <- devtools::session_info()
dplyr::select(
  tibble::as_tibble(sesion_info$packages),
  c(package, loadedversion, source)
) %>% 
  DT::datatable(filter = 'top', colnames = c('Row number' =1,'Variable' = 2, 'Percentage'= 3),
              caption = htmltools::tags$caption(
        style = 'caption-side: top; text-align: left;',
        '', htmltools::em('Packages')),
      options=list(
initComplete = htmlwidgets::JS(
        "function(settings, json) {",
        "$(this.api().tables().body()).css({
            'font-family': 'Helvetica Neue',
            'font-size': '50%', 
            'code-inline-font-size': '15%', 
            'white-space': 'nowrap',
            'line-height': '0.75em',
            'min-height': '0.5em'
            });",#;
        "}")))